# -*- coding:utf-8 -*-
# Author:凌逆战 | Never
# Date: 
"""
使用非对称的进行分析合成
"""
import numpy as np
import soundfile as sf
import scipy.signal as signal
import librosa


def getAsqrtAnalysisWindow(N, M, d):
    # filter_length, hop_length, d
    risingSqrtHann = np.sqrt(np.hanning(2 * (N - M - d) + 1)[:(N - M - d)])
    fallingSqrtHann = np.sqrt(np.hanning(2 * M + 1)[:2 * M])  # 下降

    window = np.zeros(N)
    window[:d] = 0
    window[d:N - M] = risingSqrtHann[:N - M - d]
    window[N - M:] = fallingSqrtHann[-M:]

    return window


def getAsqrtSynthesisWindow(N, M, d):
    risingSqrtHannAnalysis = np.sqrt(np.hanning(2 * (N - M - d) + 1)[:(N - M - d)])
    fallingSqrtHann = np.sqrt(np.hanning(2 * M + 1)[:2 * M])
    risingNoramlizedHann = np.hanning(2 * M + 1)[:M] / risingSqrtHannAnalysis[N - 2 * M - d:N - M - d]

    window = np.zeros(N)
    window[:-2 * M] = 0
    window[-2 * M:-M] = risingNoramlizedHann
    window[-M:] = fallingSqrtHann[-M:]

    return window


NFFT, window_len, hop_size = 512, 512, 32
frame_len = 32
overlap_size = window_len - hop_size
N = NFFT
M = hop_size*2  # 延迟等于2M-hop_size
d = 0
window_analysis = getAsqrtAnalysisWindow(N, M, d=d)
window_synthesis = getAsqrtSynthesisWindow(N, M, d=d)

p_ana_buf = np.zeros(window_len)
syn_buf = np.zeros(window_len)


def stft_analysis(in_buf):
    # get the new data
    p_ana_buf[:overlap_size] = p_ana_buf[hop_size:]
    # print(overlap_size, in_buf.shape, p_ana_buf.shape)
    p_ana_buf[overlap_size:] = in_buf
    # step2; window process
    p_wined = p_ana_buf * window_analysis
    # step3; fft, time-domain --> freq-domain
    spec = np.fft.rfft(p_wined)  # (257,)
    return spec


def stft_synthesis(spec):
    # 1. ifft, freq-domain --> time-domain
    ifft_out = np.fft.irfft(spec)
    # 2. 加窗
    ifft_out *= window_synthesis
    # 3. overlap add
    syn_buf[:overlap_size] = syn_buf[hop_size:]
    syn_buf[overlap_size:] = 0
    # Sum
    for i in range(window_len):
        syn_buf[i] += ifft_out[i]

    # ASYM
    # 如果M!=hop_size,则需要调整tmp的值
    if M != hop_size:
        tmp = window_len - int(2 * hop_size * (M / hop_size))
    else:
        tmp = window_len - 2 * hop_size
    print("tmp", tmp)
    p_out = syn_buf[tmp:tmp + hop_size]
    return p_out


def main():
    # 1. 读取音频
    wav, wav_sr = sf.read("./TIMIT.wav", dtype="float32")  # (46797,)
    frame_num = len(wav) // frame_len
    out_wav = []
    for i in range(frame_num):
        in_buf = wav[i * frame_len:i * frame_len + frame_len]
        # 3. stft
        spec = stft_analysis(in_buf)
        # 4. 逆stft
        p_out = stft_synthesis(spec)
        out_wav.extend(p_out)
    out_wav = np.array(out_wav)
    if M!=hop_size:
        out_wav = out_wav/(M/hop_size)
    print(out_wav.shape)
    # 5. 写入音频
    sf.write("./out.wav", out_wav, wav_sr)


if __name__ == '__main__':
    main()
